import huggingface_hub
from huggingface_hub import snapshot_download
from ..smp import *
from .video_base import VideoBaseDataset
from .utils import build_judge, DEBUG_MESSAGE
from ..utils import track_progress_rich
import torchvision.transforms as T
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
from decord import VideoReader, cpu
import imageio
import cv2
import zipfile
import os
import glob
from .utils.mvbench import *

FAIL_MSG = "Failed to obtain answer via API."


class MVBench(VideoBaseDataset):

    MD5 = "fd21d36522cdedd46d84dc46715ad832"
    SYS = """Carefully watch the video and pay attention to the cause and sequence of events, \
the detail and movement of objects, and the action and pose of persons. \
Based on your observations, select the best option that accurately addresses the question.
"""

    TYPE = "Video-MCQ"

    def __init__(self, dataset="MVBench", nframe=0, fps=-1):
        self.type_data_list = {
            "Action Sequence": (
                "action_sequence.json",
                "your_data_path/star/Charades_v1_480/",
                "video",
                True,
            ),  # has start & end
            "Action Prediction": (
                "action_prediction.json",
                "your_data_path/star/Charades_v1_480/",
                "video",
                True,
            ),  # has start & end
            "Action Antonym": (
                "action_antonym.json",
                "your_data_path/ssv2_video/",
                "video",
                False,
            ),
            "Fine-grained Action": (
                "fine_grained_action.json",
                "your_data_path/Moments_in_Time_Raw/videos/",
                "video",
                False,
            ),
            "Unexpected Action": (
                "unexpected_action.json",
                "your_data_path/FunQA_test/test/",
                "video",
                False,
            ),
            "Object Existence": (
                "object_existence.json",
                "your_data_path/clevrer/video_validation/",
                "video",
                False,
            ),
            "Object Interaction": (
                "object_interaction.json",
                "your_data_path/star/Charades_v1_480/",
                "video",
                True,
            ),  # has start & end
            "Object Shuffle": (
                "object_shuffle.json",
                "your_data_path/perception/videos/",
                "video",
                False,
            ),
            "Moving Direction": (
                "moving_direction.json",
                "your_data_path/clevrer/video_validation/",
                "video",
                False,
            ),
            "Action Localization": (
                "action_localization.json",
                "your_data_path/sta/sta_video/",
                "video",
                True,
            ),  # has start & end
            "Scene Transition": (
                "scene_transition.json",
                "your_data_path/scene_qa/video/",
                "video",
                False,
            ),
            "Action Count": (
                "action_count.json",
                "your_data_path/perception/videos/",
                "video",
                False,
            ),
            "Moving Count": (
                "moving_count.json",
                "your_data_path/clevrer/video_validation/",
                "video",
                False,
            ),
            "Moving Attribute": (
                "moving_attribute.json",
                "your_data_path/clevrer/video_validation/",
                "video",
                False,
            ),
            "State Change": (
                "state_change.json",
                "your_data_path/perception/videos/",
                "video",
                False,
            ),
            "Fine-grained Pose": (
                "fine_grained_pose.json",
                "your_data_path/nturgbd/",
                "video",
                False,
            ),
            "Character Order": (
                "character_order.json",
                "your_data_path/perception/videos/",
                "video",
                False,
            ),
            "Egocentric Navigation": (
                "egocentric_navigation.json",
                "your_data_path/vlnqa/",
                "video",
                False,
            ),
            "Episodic Reasoning": (
                "episodic_reasoning.json",
                "your_data_path/tvqa/frames_fps3_hq/",
                "frame",
                True,
            ),  # has start & end, read frame
            "Counterfactual Inference": (
                "counterfactual_inference.json",
                "your_data_path/clevrer/video_validation/",
                "video",
                False,
            ),
        }
        super().__init__(dataset=dataset, nframe=nframe, fps=fps)

    @classmethod
    def supported_datasets(cls):
        return ["MVBench"]

    def prepare_dataset(self, dataset_name="MVBench", repo_id="OpenGVLab/MVBench"):
        def check_integrity(pth):
            data_file = osp.join(pth, f"{dataset_name}.tsv")

            if not os.path.exists(data_file):
                return False

            if md5(data_file) != self.MD5:
                return False

            data = load(data_file)
            for idx, item in data.iterrows():
                if not osp.exists(osp.join(pth, item["prefix"], item["video"])):
                    return False
            return True

        if modelscope_flag_set():
            repo_id = "modelscope/MVBench"

        cache_path = get_cache_path(repo_id, branch="main")
        if cache_path is not None and check_integrity(cache_path):
            dataset_path = cache_path
        else:

            def unzip_hf_zip(pth):
                pth = os.path.join(pth, "video/")
                for filename in os.listdir(pth):
                    if filename.endswith(".zip"):
                        # 构建完整的文件路径
                        zip_path = os.path.join(pth, filename)

                        # 解压 ZIP 文件
                        with zipfile.ZipFile(zip_path, "r") as zip_ref:
                            zip_ref.extractall(pth)

            def generate_tsv(pth):
                data_file = osp.join(pth, f"{dataset_name}.tsv")
                if os.path.exists(data_file) and md5(data_file) == self.MD5:
                    return
                json_data_dir = os.path.join(pth, "json")
                self.data_list = []
                for k, v in self.type_data_list.items():
                    with open(os.path.join(json_data_dir, v[0]), "r") as f:
                        json_data = json.load(f)
                    for data in json_data:
                        if os.path.exists(
                            os.path.join(
                                pth,
                                v[1].replace("your_data_path", "video"),
                                data["video"],
                            )
                        ):
                            self.data_list.append(
                                {
                                    "task_type": k,
                                    "prefix": v[1].replace("your_data_path", "video"),
                                    "data_type": v[2],
                                    "bound": v[3],
                                    "start": (
                                        data["start"]
                                        if "start" in data.keys()
                                        else None
                                    ),
                                    "end": (
                                        data["end"] if "end" in data.keys() else None
                                    ),
                                    "video": data["video"],
                                    "question": data["question"],
                                    "answer": data["answer"],
                                    "candidates": data["candidates"],
                                }
                            )
                        else:
                            print(
                                "NTURGB-D zip file is removed according to MVBench, you can view it at "
                                "https://huggingface.co/datasets/OpenGVLab/MVBench for detailed reason."
                            )
                            raise Exception(
                                f"{os.path.join(v[1].replace('your_data_path', 'video'), data['video'])} does not exist"
                            )

                data_df = pd.DataFrame(self.data_list)
                data_df = data_df.assign(index=range(len(data_df)))
                data_df.to_csv(data_file, sep="\t", index=False)

            def move_files(pth):
                src_folder = os.path.join(pth, "video/data0613")
                if not os.path.exists(src_folder):
                    return
                for subdir in os.listdir(src_folder):
                    subdir_path = os.path.join(src_folder, subdir)
                    if os.path.isdir(subdir_path):
                        for subsubdir in os.listdir(subdir_path):
                            subsubdir_path = os.path.join(subdir_path, subsubdir)
                            if os.path.isdir(subsubdir_path):
                                for item in os.listdir(subsubdir_path):
                                    item_path = os.path.join(subsubdir_path, item)
                                    target_folder = os.path.join(
                                        pth, "video", subdir, subsubdir
                                    )
                                    if not os.path.exists(target_folder):
                                        os.makedirs(target_folder)
                                    target_path = os.path.join(target_folder, item)
                                    try:
                                        shutil.move(item_path, target_path)
                                    except Exception as e:
                                        print(
                                            f"Error moving {item_path} to {target_path}: {e}"
                                        )

            if modelscope_flag_set():
                from modelscope import dataset_snapshot_download

                dataset_path = dataset_snapshot_download(
                    dataset_id=repo_id, revision="master"
                )
            else:
                hf_token = os.environ.get("HUGGINGFACE_TOKEN")
                huggingface_hub.login(hf_token)
                dataset_path = snapshot_download(repo_id=repo_id, repo_type="dataset")
            unzip_hf_zip(dataset_path)
            move_files(dataset_path)
            generate_tsv(dataset_path)

        data_file = osp.join(dataset_path, f"{dataset_name}.tsv")

        self.decord_method = {
            "video": self.read_video,
            "gif": self.read_gif,
            "frame": self.read_frame,
        }

        self.nframe = 8
        self.frame_fps = 3

        # transform
        self.transform = T.Compose([Stack(), ToTorchFormatTensor()])

        return dict(root=dataset_path, data_file=data_file)

    def get_index(self, bound, fps, max_frame, first_idx=0):
        if bound:
            start, end = bound[0], bound[1]
        else:
            start, end = -100000, 100000
        start_idx = max(first_idx, round(start * fps))
        end_idx = min(round(end * fps), max_frame)
        seg_size = float(end_idx - start_idx) / self.num_segments
        frame_indices = np.array(
            [
                int(start_idx + (seg_size / 2) + np.round(seg_size * idx))
                for idx in range(self.num_segments)
            ]
        )
        return frame_indices

    def read_video(self, video_path, bound=None):
        vr = VideoReader(video_path, ctx=cpu(0), num_threads=1)
        max_frame = len(vr) - 1
        fps = float(vr.get_avg_fps())

        images_group = list()
        frame_indices = self.get_index(bound, fps, max_frame, first_idx=0)
        for frame_index in frame_indices:
            img = Image.fromarray(vr[frame_index].asnumpy())
            images_group.append(img)
        torch_imgs = self.transform(images_group)
        return torch_imgs

    def read_gif(self, video_path, bound=None, fps=25):
        gif = imageio.get_reader(video_path)
        max_frame = len(gif) - 1

        images_group = list()
        frame_indices = self.get_index(bound, fps, max_frame, first_idx=0)
        for index, frame in enumerate(gif):
            if index in frame_indices:
                img = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
                img = Image.fromarray(img)
                images_group.append(img)
        torch_imgs = self.transform(images_group)
        return torch_imgs

    def read_frame(self, video_path, bound=None, fps=3):
        max_frame = len(os.listdir(video_path))
        images_group = list()
        frame_indices = self.get_index(
            bound, fps, max_frame, first_idx=1
        )  # frame_idx starts from 1
        for frame_index in frame_indices:
            img = Image.open(os.path.join(video_path, f"{frame_index:05d}.jpg"))
            images_group.append(img)
        torch_imgs = self.transform(images_group)
        return torch_imgs

    def save_video_frames(self, imgs, video_name, frames):

        frame_paths = self.frame_paths(video_name)
        flag = np.all([osp.exists(p) for p in frame_paths])

        if not flag:
            block_size = imgs.size(0) // frames
            split_tensors = torch.split(imgs, block_size)
            to_pil = transforms.ToPILImage()
            images = [to_pil(arr) for arr in split_tensors]
            for im, pth in zip(images, frame_paths):
                if not osp.exists(pth):
                    im.save(pth)

        return frame_paths

    def qa_template(self, data):
        question = f"Question: {data['question']}\n"
        question += "Options:\n"
        answer = data["answer"]
        answer_idx = -1
        for idx, c in enumerate(eval(data["candidates"])):
            question += f"({chr(ord('A') + idx)}) {c}\n"
            if c == answer:
                answer_idx = idx
        question = question.rstrip()
        answer = f"({chr(ord('A') + answer_idx)}) {answer}"
        return question, answer

    def load_into_video_and_process(self, line):
        try:
            from moviepy.editor import VideoFileClip, ImageSequenceClip
        except:
            raise ImportError(
                'MoviePy is not installed, please install it by running "pip install moviepy==1.0.3"'
            )
        video_path = os.path.join(self.data_root, line["prefix"], line["video"])

        if line["data_type"] in ["gif"] or os.path.splitext(video_path)[1] in [".webm"]:
            processed_video_path = video_path.replace(
                os.path.splitext(video_path)[1], ".mp4"
            )
            if not os.path.exists(processed_video_path):
                # using MoviePy to transform GIF, webm into mp4 format
                gif_clip = VideoFileClip(video_path)
                gif_clip.write_videofile(processed_video_path, codec="libx264")
                gif_clip.close()
        elif line["data_type"] in ["frame"]:
            input_images = os.path.join(video_path, "*.jpg")
            processed_video_path = f"{video_path}.mp4"
            if not os.path.exists(processed_video_path):
                # using MoviePy to transform images into mp4
                image_files = sorted(glob.glob(input_images))
                image_clip = ImageSequenceClip(image_files, fps=self.frame_fps)
                image_clip.write_videofile(processed_video_path, codec="libx264")
                image_clip.close()
        else:
            processed_video_path = video_path

        if line["bound"]:
            base_name, suffix = os.path.splitext(processed_video_path)
            output_video_path = f"{base_name}_processed{suffix}"
            if not os.path.exists(output_video_path):
                video_clip = VideoFileClip(processed_video_path)
                clip = video_clip.subclip(
                    line["start"], min(line["end"], video_clip.duration)
                )
                clip.write_videofile(output_video_path)
                clip.close()
        else:
            output_video_path = processed_video_path

        return output_video_path

    def save_video_into_images(self, line):
        bound = None
        if line["bound"]:
            bound = (
                line["start"],
                line["end"],
            )
        video_path = os.path.join(self.data_root, line["prefix"], line["video"])
        decord_method = self.decord_method[line["data_type"]]
        self.num_segments = self.nframe
        torch_imgs = decord_method(video_path, bound)
        img_frame_paths = self.save_video_frames(
            torch_imgs, line["video"], self.num_segments
        )
        return img_frame_paths

    def build_prompt(self, line, video_llm):
        if self.fps > 0:
            raise ValueError(
                "MVBench does not support fps setting, please transfer to MVBench_MP4!"
            )
        if isinstance(line, int):
            assert line < len(self)
            line = self.data.iloc[line]

        question, answer = self.qa_template(line)
        message = [dict(type="text", value=self.SYS, role="system")]
        message.append(dict(type="text", value=question))
        if video_llm:
            new_video_path = self.load_into_video_and_process(line)
            message.append(dict(type="video", value=new_video_path))
        else:
            img_frame_paths = self.save_video_into_images(line)
            for im in img_frame_paths:
                message.append(dict(type="image", value=im))
        message.append(dict(type="text", value="\nOnly give the best option."))
        message.append(dict(type="text", value="Best option:(", role="assistant"))
        return message

    @classmethod
    def evaluate(self, eval_file, **judge_kwargs):

        assert eval_file.endswith(".xlsx"), "data file should be an xlsx file"

        tmp_file = eval_file.replace(".xlsx", "_tmp.pkl")
        tgt_file = eval_file.replace(".xlsx", "_rating.json")
        score_file = eval_file.replace(".xlsx", "_score.xlsx")

        if not osp.exists(score_file):
            model = judge_kwargs.setdefault("model", "chatgpt-0125")
            assert model in ["chatgpt-0125", "exact_matching", "gpt-4-0125"]

            if model == "exact_matching":
                model = None
            elif gpt_key_set():
                model = build_judge(**judge_kwargs)
                if not model.working():
                    warnings.warn(
                        "OPENAI API is not working properly, will use exact matching for evaluation"
                    )
                    warnings.warn(DEBUG_MESSAGE)
                    model = None
            else:
                warnings.warn(
                    "OPENAI_API_KEY is not set properly, will use exact matching for evaluation"
                )
                model = None
            res = {} if not osp.exists(tmp_file) else load(tmp_file)
            res = {k: v for k, v in res.items() if FAIL_MSG not in v}

            data = load(eval_file)
            data_un = data[~pd.isna(data["prediction"])]

            for idx in data_un["index"]:
                ans = data.loc[data["index"] == idx, "answer"].values[0]
                pred = data.loc[data["index"] == idx, "prediction"].values[0]
                options = eval(data.loc[data["index"] == idx, "candidates"].values[0])
                answer_idx = -1
                for id, c in enumerate(options):
                    if c == ans:
                        answer_idx = id
                ans = f"({chr(ord('A') + answer_idx)}) {ans}"
                input_item = data.loc[data["index"] == idx].to_dict(orient="records")[0]
                for id, option_content in enumerate(eval(input_item["candidates"])):
                    input_item[chr(ord("A") + id)] = option_content
                    if option_content == input_item["answer"]:
                        input_item["answer"] = chr(ord("A") + id)

                if FAIL_MSG in pred:
                    data.loc[idx, "score"] = -1
                else:
                    data.loc[idx, "score"] = int(
                        check_ans_with_model(pred, ans, model, input_item, "MVBench")
                    )

            rejected = [x for x in data["score"] if x == -1]

            print(
                f"Among {len(data)} questions, failed to obtain prediction for {len(data) - len(data_un)} questions, "
                f"failed to obtain the score for another {len(rejected)} questions. "
                f"Those questions will be counted as -1 score in ALL rating, and will not be counted in VALID rating."
            )

            dump(data, score_file)

        rating = get_dimension_rating(score_file)
        dump(rating, tgt_file)
        return rating


class MVBench_MP4(VideoBaseDataset):

    MP4_MD5 = "5c8c6f8b7972c2de65a629590f7c42f5"
    SYS = """Carefully watch the video and pay attention to the cause and sequence of events, \
the detail and movement of objects, and the action and pose of persons. \
Based on your observations, select the best option that accurately addresses the question.
"""
    TYPE = "Video-MCQ"

    def __init__(self, dataset="MVBench_MP4", nframe=0, fps=-1):
        super().__init__(dataset=dataset, nframe=nframe, fps=fps)

    @classmethod
    def supported_datasets(cls):
        return ["MVBench_MP4"]

    def prepare_dataset(self, dataset_name="MVBench_MP4", repo_id="OpenGVLab/MVBench"):
        def check_integrity(pth):
            data_file = osp.join(pth, f"{dataset_name}.tsv")

            if not os.path.exists(data_file):
                return False

            if md5(data_file) != self.MP4_MD5:
                return False

            data = load(data_file)
            for idx, item in data.iterrows():
                if not osp.exists(osp.join(pth, item["prefix"], item["video"])):
                    return False
            return True

        if modelscope_flag_set():
            repo_id = "modelscope/MVBench"

        cache_path = get_cache_path(repo_id, branch="video")
        if cache_path is not None and check_integrity(cache_path):
            dataset_path = cache_path
        else:

            def generate_tsv(pth):
                data_file = osp.join(pth, f"{dataset_name}.tsv")
                if os.path.exists(data_file) and md5(data_file) == self.MP4_MD5:
                    return
                json_data_path = os.path.join(dataset_path, "test.json")
                json_data = load(json_data_path)
                root_data_dict = json_data["root"]
                self.data_list = []
                for k, v in json_data["meta"].items():
                    for item in v:
                        self.data_list.append(
                            {
                                "task_type": k,
                                "prefix": root_data_dict[k],
                                "video": item["video"],
                                "question": item["question"],
                                "answer": item["answer"],
                                "candidates": item["candidates"],
                            }
                        )
                data_df = pd.DataFrame(self.data_list)
                data_df = data_df.assign(index=range(len(data_df)))
                data_df.to_csv(data_file, sep="\t", index=False)

            if modelscope_flag_set():
                from modelscope import dataset_snapshot_download

                dataset_path = dataset_snapshot_download(
                    dataset_id=repo_id, revision="video"
                )
            else:
                hf_token = os.environ.get("HUGGINGFACE_TOKEN")
                huggingface_hub.login(hf_token)
                dataset_path = snapshot_download(
                    repo_id=repo_id, repo_type="dataset", revision="video"
                )
            generate_tsv(dataset_path)

        data_file = osp.join(dataset_path, f"{dataset_name}.tsv")

        # transform
        self.transform = T.Compose([Stack(), ToTorchFormatTensor()])

        return dict(root=dataset_path, data_file=data_file)

    def qa_template(self, data):
        question = f"Question: {data['question']}\n"
        question += "Options:\n"
        answer = data["answer"]
        answer_idx = -1
        for idx, c in enumerate(eval(data["candidates"])):
            question += f"({chr(ord('A') + idx)}) {c}\n"
            if c == answer:
                answer_idx = idx
        question = question.rstrip()
        answer = f"({chr(ord('A') + answer_idx)}) {answer}"
        return question, answer

    def get_index_by_frame(self, max_frame):
        seg_size = float(max_frame) / self.num_segments
        frame_indices = np.array(
            [
                int((seg_size / 2) + np.round(seg_size * idx))
                for idx in range(self.num_segments)
            ]
        )
        return frame_indices

    def get_index_by_fps(self, vid, fps):
        total_frames = len(vid)
        video_fps = vid.get_avg_fps()
        total_duration = total_frames / video_fps
        required_frames = int(total_duration * fps)
        step_size = video_fps / fps
        frame_indices = np.array([int(i * step_size) for i in range(required_frames)])
        self.num_segments = len(frame_indices)
        return frame_indices

    def read_video(self, video_path):
        vr = VideoReader(video_path, ctx=cpu(0), num_threads=1)
        max_frame = len(vr) - 1

        images_group = list()
        if self.fps < 0:
            frame_indices = self.get_index_by_frame(max_frame)
        else:
            frame_indices = self.get_index_by_fps(vr, self.fps)

        for frame_index in frame_indices:
            img = Image.fromarray(vr[frame_index].asnumpy())
            images_group.append(img)
        torch_imgs = self.transform(images_group)
        return torch_imgs

    def save_video_frames(self, imgs, video_name, frames):
        if self.fps > 0:
            frame_paths = self.frame_paths_fps(video_name, frames)
        else:
            frame_paths = self.frame_paths(video_name)
        flag = np.all([osp.exists(p) for p in frame_paths])

        if not flag:
            block_size = imgs.size(0) // frames
            split_tensors = torch.split(imgs, block_size)
            to_pil = transforms.ToPILImage()
            images = [to_pil(arr) for arr in split_tensors]
            for im, pth in zip(images, frame_paths):
                if not osp.exists(pth):
                    im.save(pth)

        return frame_paths

    def save_video_into_images(self, line):
        video_path = os.path.join(self.data_root, line["prefix"], line["video"])
        if self.fps <= 0:
            self.num_segments = self.nframe
        else:
            self.num_segments = 0
        torch_imgs = self.read_video(video_path)
        img_frame_paths = self.save_video_frames(
            torch_imgs, line["video"], self.num_segments
        )
        return img_frame_paths

    def build_prompt(self, line, video_llm):
        if isinstance(line, int):
            assert line < len(self)
            line = self.data.iloc[line]

        question, answer = self.qa_template(line)
        message = [dict(type="text", value=self.SYS, role="system")]
        message.append(dict(type="text", value=question))
        video_path = os.path.join(self.data_root, line["prefix"], line["video"])
        if video_llm:
            message.append(dict(type="video", value=video_path))
        else:
            img_frame_paths = self.save_video_into_images(line)
            for im in img_frame_paths:
                message.append(dict(type="image", value=im))
        message.append(dict(type="text", value="\nOnly give the best option."))
        message.append(dict(type="text", value="Best option:(", role="assistant"))
        return message

    @classmethod
    def evaluate(self, eval_file, **judge_kwargs):

        assert eval_file.endswith(".xlsx"), "data file should be an xlsx file"

        tmp_file = eval_file.replace(".xlsx", "_tmp.pkl")
        tgt_file = eval_file.replace(".xlsx", "_rating.json")
        score_file = eval_file.replace(".xlsx", "_score.xlsx")

        if not osp.exists(score_file):
            model = judge_kwargs.setdefault("model", "chatgpt-0125")
            assert model in ["chatgpt-0125", "exact_matching", "gpt-4-0125"]

            if model == "exact_matching":
                model = None
            elif gpt_key_set():
                model = build_judge(**judge_kwargs)
                if not model.working():
                    warnings.warn(
                        "OPENAI API is not working properly, will use exact matching for evaluation"
                    )
                    warnings.warn(DEBUG_MESSAGE)
                    model = None
            else:
                warnings.warn(
                    "OPENAI_API_KEY is not set properly, will use exact matching for evaluation"
                )
                model = None
            res = {} if not osp.exists(tmp_file) else load(tmp_file)
            res = {k: v for k, v in res.items() if FAIL_MSG not in v}

            data = load(eval_file)
            data_un = data[~pd.isna(data["prediction"])]

            for idx in data_un["index"]:
                ans = data.loc[data["index"] == idx, "answer"].values[0]
                pred = data.loc[data["index"] == idx, "prediction"].values[0]
                options = eval(data.loc[data["index"] == idx, "candidates"].values[0])
                answer_idx = -1
                for id, c in enumerate(options):
                    if c == ans:
                        answer_idx = id
                ans = f"({chr(ord('A') + answer_idx)}) {ans}"
                input_item = data.loc[data["index"] == idx].to_dict(orient="records")[0]
                for id, option_content in enumerate(eval(input_item["candidates"])):
                    input_item[chr(ord("A") + id)] = option_content
                    if option_content == input_item["answer"]:
                        input_item["answer"] = chr(ord("A") + id)

                if FAIL_MSG in pred:
                    data.loc[idx, "score"] = -1
                else:
                    data.loc[idx, "score"] = int(
                        check_ans_with_model(
                            pred, ans, model, input_item, "MVBench_MP4"
                        )
                    )

            rejected = [x for x in data["score"] if x == -1]

            print(
                f"Among {len(data)} questions, failed to obtain prediction for {len(data) - len(data_un)} questions, "
                f"failed to obtain the score for another {len(rejected)} questions. "
                f"Those questions will be counted as -1 score in ALL rating, and will not be counted in VALID rating."
            )

            dump(data, score_file)

        rating = get_dimension_rating(score_file)
        dump(rating, tgt_file)
        return rating
